%top {
/* Include this before everything else, for various large-file definitions */
#include "config.h"
#include <wireshark.h>
}

/*
 * We want a reentrant scanner.
 */
%option reentrant

/*
 * We don't read interactively from the terminal.
 */
%option never-interactive

/*
 * We want to stop processing when we get to the end of the input.
 */
%option noyywrap

/*
 * The type for the state we keep for the scanner (and parser).
 */
%option extra-type="protobuf_lang_state_t *"

/*
 * Prefix scanner routines with "protobuf_lang_" rather than "yy", so this scanner
 * can coexist with other scanners.
 */
%option prefix="protobuf_lang_"

/*
 * We have to override the memory allocators so that we don't get
 * "unused argument" warnings from the yyscanner argument (which
 * we don't use, as we have a global memory allocator).
 *
 * We provide, as macros, our own versions of the routines generated by Flex,
 * which just call malloc()/realloc()/free() (as the Flex versions do),
 * discarding the extra argument.
 */
%option noyyalloc
%option noyyrealloc
%option noyyfree
%option yylineno
%option noinput
%option nounput

%{
/* protobuf_lang_scanner.l
 *
 * C Protocol Buffers Language Lexer (for *.proto files)
 * Copyright 2020, Huang Qiangxiong <qiangxiong.huang@qq.com>
 *
 * SPDX-License-Identifier: GPL-2.0-or-later
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "protobuf_lang_tree.h"
#include "protobuf_lang_parser.h"

/*
 * Disable diagnostics in the code generated by Flex.
 */
DIAG_OFF_FLEX()

/*
 * Sleazy hack to suppress compiler warnings in yy_fatal_error().
 */
#define YY_EXIT_FAILURE ((void)yyscanner, 2)

/*
 * Macros for the allocators, to discard the extra argument.
 */
#define protobuf_lang_alloc(size, yyscanner)        (void *)malloc(size)
#define protobuf_lang_realloc(ptr, size, yyscanner)    (void *)realloc((char *)(ptr), (size))
#define protobuf_lang_free(ptr, yyscanner)        free((char *)ptr)

int old_status;

/* Extended error handling function defined in protobuf_lang_grammar.lemon */
void pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...);

/* duplicate the text and keep the pointer in parser state for freeing later automatically */
static gchar*
strdup_and_store(void* yyscanner, const char* text);

#define PROTOBUF_LANG_PARSE(token_type)  \
            protobuf_lang_get_extra(yyscanner)->tmp_token = g_new0(protobuf_lang_token_t, 1); \
            pbl_store_struct_token(protobuf_lang_get_extra(yyscanner), protobuf_lang_get_extra(yyscanner)->tmp_token); \
            protobuf_lang_get_extra(yyscanner)->tmp_token->v = strdup_and_store(yyscanner, yytext); \
            protobuf_lang_get_extra(yyscanner)->tmp_token->ln = protobuf_lang_get_lineno(yyscanner); \
            return (token_type);

%}

%x COMMENT

%%
    /* operations or symbols (PT_ means PBL Token) */
"("                             PROTOBUF_LANG_PARSE(PT_LPAREN);
")"                             PROTOBUF_LANG_PARSE(PT_RPAREN);
"["                             PROTOBUF_LANG_PARSE(PT_LBRACKET);
"]"                             PROTOBUF_LANG_PARSE(PT_RBRACKET);
"{"                             PROTOBUF_LANG_PARSE(PT_LCURLY);
"}"                             PROTOBUF_LANG_PARSE(PT_RCURLY);
"=="                            PROTOBUF_LANG_PARSE(PT_EQUAL);
"!="                            PROTOBUF_LANG_PARSE(PT_NOTEQUAL);
"<>"                            PROTOBUF_LANG_PARSE(PT_NOTEQUAL2);
">="                            PROTOBUF_LANG_PARSE(PT_GEQUAL);
"<="                            PROTOBUF_LANG_PARSE(PT_LEQUAL);
"+="                            PROTOBUF_LANG_PARSE(PT_ASSIGN_PLUS);
"="                             PROTOBUF_LANG_PARSE(PT_ASSIGN);
"+"                             PROTOBUF_LANG_PARSE(PT_PLUS);
"-"                             PROTOBUF_LANG_PARSE(PT_MINUS);
"*"                             PROTOBUF_LANG_PARSE(PT_MULTIPLY);
"/"                             PROTOBUF_LANG_PARSE(PT_DIV);
"||"                            PROTOBUF_LANG_PARSE(PT_LOGIC_OR);
"|"                             PROTOBUF_LANG_PARSE(PT_OR);
"&&"                            PROTOBUF_LANG_PARSE(PT_LOGIC_AND);
"&"                             PROTOBUF_LANG_PARSE(PT_AND);
"!"                             PROTOBUF_LANG_PARSE(PT_NOT);
"~"                             PROTOBUF_LANG_PARSE(PT_NEG);
"^"                             PROTOBUF_LANG_PARSE(PT_XOR);
"<<"                            PROTOBUF_LANG_PARSE(PT_SHL);
">>"                            PROTOBUF_LANG_PARSE(PT_SHR);
"%"                             PROTOBUF_LANG_PARSE(PT_PERCENT);
"$"                             PROTOBUF_LANG_PARSE(PT_DOLLAR);
"?"                             PROTOBUF_LANG_PARSE(PT_COND);
";"                             PROTOBUF_LANG_PARSE(PT_SEMICOLON);
"."                             PROTOBUF_LANG_PARSE(PT_DOT);
","                             PROTOBUF_LANG_PARSE(PT_COMMA);
":"                             PROTOBUF_LANG_PARSE(PT_COLON);
"<"                             PROTOBUF_LANG_PARSE(PT_LESS);
">"                             PROTOBUF_LANG_PARSE(PT_GREATER);

    /* key words */
syntax                          PROTOBUF_LANG_PARSE(PT_SYNTAX);
edition                         PROTOBUF_LANG_PARSE(PT_EDITION);
import                          PROTOBUF_LANG_PARSE(PT_IMPORT);
weak                            PROTOBUF_LANG_PARSE(PT_WEAK);
public                          PROTOBUF_LANG_PARSE(PT_PUBLIC);
package                         PROTOBUF_LANG_PARSE(PT_PACKAGE);
option                          PROTOBUF_LANG_PARSE(PT_OPTION);
required                        PROTOBUF_LANG_PARSE(PT_REQUIRED);
optional                        PROTOBUF_LANG_PARSE(PT_OPTIONAL);
repeated                        PROTOBUF_LANG_PARSE(PT_REPEATED);
oneof                           PROTOBUF_LANG_PARSE(PT_ONEOF);
map                             PROTOBUF_LANG_PARSE(PT_MAP);
reserved                        PROTOBUF_LANG_PARSE(PT_RESERVED);
enum                            PROTOBUF_LANG_PARSE(PT_ENUM);
group                           PROTOBUF_LANG_PARSE(PT_GROUP);
extend                          PROTOBUF_LANG_PARSE(PT_EXTEND);
extensions                      PROTOBUF_LANG_PARSE(PT_EXTENSIONS);
message                         PROTOBUF_LANG_PARSE(PT_MESSAGE);
service                         PROTOBUF_LANG_PARSE(PT_SERVICE);
rpc                             PROTOBUF_LANG_PARSE(PT_RPC);
stream                          PROTOBUF_LANG_PARSE(PT_STREAM);
returns                         PROTOBUF_LANG_PARSE(PT_RETURNS);
to                              PROTOBUF_LANG_PARSE(PT_TO);

    /* intLit values */
0|[1-9][0-9]*                   PROTOBUF_LANG_PARSE(PT_DECIMALLIT);
0[0-7]*                         PROTOBUF_LANG_PARSE(PT_OCTALLIT);
0[xX][0-9a-fA-F]+               PROTOBUF_LANG_PARSE(PT_HEXLIT);

    /* Using extended identifier because we care only about position */
[a-zA-Z0-9_.][a-zA-Z0-9_.+-]*    PROTOBUF_LANG_PARSE(PT_IDENT);
\"(\'|\\\"|[^\""\n"])*\"         PROTOBUF_LANG_PARSE(PT_STRLIT);
\'(\"|\\\'|[^\'"\n"])*\'         PROTOBUF_LANG_PARSE(PT_STRLIT);

    /* comments */
"//"[^\r\n]*
"/*"                   { old_status = YY_START; BEGIN COMMENT; }
<COMMENT>"*/"          { BEGIN old_status; }
<COMMENT>([^*]|\n)+|.

    /* space & tab */
[ \t\r\n]
    /* prevent flex jam */
.           { pbl_parser_error(protobuf_lang_get_extra(yyscanner), "unexpected token in proto file!\n"); }

%%

static gchar*
strdup_and_store(void* yyscanner, const char* text) {
    return pbl_store_string_token(protobuf_lang_get_extra(yyscanner), g_strdup(text));
}

/*
 * Turn diagnostics back on, so we check the code that we've written.
 */
DIAG_ON_FLEX()
